This code is for the results to the question: Do features of caregiver verbal engagement vary as a function of language group and activity?

Load libraries and set theme

library(tidyverse)
library(ggpubr)
library(psych)
library(lme4)
library(lmerTest)
library(emmeans)
library(sjPlot)
library(ggeffects)

theme_set(theme_bw())

get_legend<-function(myggplot){
  tmp <- ggplot_gtable(ggplot_build(myggplot))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  legend <- tmp$grobs[[leg]]
  return(legend)
}

Read in data and convert

# demographics
demo_english <- read_csv("./data_demo_lena_transcripts/demo_english_ms.csv") %>% 
  rename(id = ID, hi = HI24Final, momed = Momed) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "english")

demo_spanish <- read_csv("./data_demo_lena_transcripts/demo_spanish_ms.csv") %>% 
  rename(id = ID, hi = HI_18, momed = MomEd_25m) %>% 
  dplyr::select(id, hi, momed) %>% 
  mutate(id = as.character(id), 
         language = "spanish")

demo <- rbind(demo_english, demo_spanish)



# NOTE about periods of non-tCDCS
# gemods refers to when there are designated start/end periods of other-directed speech (ODS); this was captured using gems (@G) using CHAT conventions
# kwalods refers to when ODS was transcribed at an utterance-level within a tCDS activity period between caregiver and child (e.g., other-directed speech in the background); this was captured per utterances using CHAT postcodes
## for tokens/min and types/min, we do not include ODS that occurred within a period of tCDS, because durations were captured by activity and not by utterance
## for mlu, we include all ODS across gemods and kwalods


# NOTE about speech == "all"
# "speech" includes two levels: all, spont
# all = refers to all speech by caregivers
# spont = refers to only speech by caregivers that was considered spontaneous rather than recited (e.g., reading book text, singing memorized common songs like itsy bitsy spider); therefore, 'spont' is a subset of 'all'


# freq
freq <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  filter(activity != "kwalods", 
         speech == "all") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc", "gemods" = "non_tcds")) %>% # we collapsed across play, routines, feeding, and unstructured conversation for the final paper [see supplemental for all categories]
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(dur_min2 = sum(dur_min),
         tokens2 = sum(tokens), 
         types2 = mean(types)) %>% 
  distinct(id, activity, language, speaker, segment_num, tokens2, types2, dur_min2) %>% 
  mutate(tokens_permin2 = tokens2/dur_min2, 
         types_permin2 = types2/dur_min2) %>% 
  distinct(id, activity, language, speaker, segment_num, tokens_permin2, types_permin2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")), 
         id = factor(id), 
         language = factor(language)) 


# mlu
mlu <- read_csv("./data_demo_lena_transcripts/mlu.csv") %>% 
  filter(speech == "all") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc", "ods" = "non_tcds")) %>% 
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(words_sum2 = sum(words_sum),
         num_utt_sum2 = sum(num_utt_sum)) %>% 
  distinct(id, activity, language, speaker, segment_num, words_sum2, num_utt_sum2) %>% 
  group_by(id, activity, segment_num, speaker) %>% 
  mutate(mlu_w2 = words_sum2/num_utt_sum2) %>% 
  distinct(id, activity, language, speaker, segment_num, mlu_w2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac", "non_tcds")), 
         id = factor(id), 
         language = factor(language))


# chip
# this includes only caregivers, therefore there is no speaker column
# we exclude periods of ODS because this is about responsiveness to the child during periods of tCDS
chip <- read_csv("./data_demo_lena_transcripts/chip.csv") %>% 
  filter(activity != "ods") %>% 
  mutate(activity = recode(activity, "play" = "cc", "conv" = "cc", 
                           "food" = "cc", "routines" = "cc")) %>% 
  group_by(id, activity, segment_num) %>% 
  mutate(total_adult_resp2 = sum(total_adult_resp),
         total_adult_imitexp2 = sum(total_adult_imitexp), 
         total_child_utt2 = sum(total_child_utt)) %>% 
  distinct(id, activity, language, segment_num, total_adult_resp2, total_adult_imitexp2, total_child_utt2) %>%
  mutate(prop_adultresp2 = total_adult_resp2/total_child_utt2, 
         prop_adult_imitexp2 = total_adult_imitexp2/total_child_utt2) %>% 
  distinct(id, activity, segment_num, language, prop_adultresp2, prop_adult_imitexp2) %>% 
  ungroup() %>% 
  mutate(activity = factor(activity, levels = c("books", "cc", "ac")), 
         id = factor(id), 
         language = factor(language))
  

str(freq)
## tibble[,7] [2,870 × 7] (S3: tbl_df/tbl/data.frame)
##  $ id            : Factor w/ 90 levels "7292","7352",..: 47 47 47 47 50 50 52 52 52 52 ...
##  $ activity      : Factor w/ 4 levels "books","cc","ac",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ speaker       : chr [1:2870] "CHI" "ADULTS" "CHI" "ADULTS" ...
##  $ segment_num   : num [1:2870] 12 12 15 15 2 2 11 11 5 5 ...
##  $ language      : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ tokens_permin2: num [1:2870] 8.46 42.57 5.32 21.75 12.31 ...
##  $ types_permin2 : num [1:2870] 4.79 19.73 2.59 9.89 3.61 ...
str(mlu)
## tibble[,6] [2,594 × 6] (S3: tbl_df/tbl/data.frame)
##  $ id         : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ activity   : Factor w/ 4 levels "books","cc","ac",..: 3 3 2 2 4 4 3 3 2 2 ...
##  $ speaker    : chr [1:2594] "ADULTS" "CHI" "ADULTS" "CHI" ...
##  $ segment_num: num [1:2594] 2 2 2 2 2 2 3 3 3 3 ...
##  $ language   : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ mlu_w2     : num [1:2594] 3.18 1.89 2.84 1.73 5.5 ...
str(chip)
## tibble[,6] [899 × 6] (S3: tbl_df/tbl/data.frame)
##  $ activity           : Factor w/ 3 levels "books","cc","ac": 3 2 3 2 3 2 2 3 2 3 ...
##  $ id                 : Factor w/ 90 levels "7292","7352",..: 46 46 46 46 46 46 46 46 46 46 ...
##  $ language           : Factor w/ 2 levels "english","spanish": 1 1 1 1 1 1 1 1 1 1 ...
##  $ segment_num        : num [1:899] 2 2 3 3 4 4 5 7 7 8 ...
##  $ prop_adultresp2    : num [1:899] 1.35 1.57 1.43 1.65 2.14 ...
##  $ prop_adult_imitexp2: num [1:899] 0.391 0.418 0.463 0.35 0.643 ...

Create dfs for ADULTS and CHI

# FREQ
freq_adult <- freq %>% 
  filter(speaker == "ADULTS")


# MLU
mlu_adult <- mlu %>% 
  filter(speaker == "ADULTS")

Prep data for mixed models

Create tokens rate per hour - Children

freq_hr_child <- read_csv("./data_demo_lena_transcripts/freq.csv") %>% 
  dplyr::select(-X1) %>% 
  filter(speech == "all", 
         speaker == "CHI") %>% 
  group_by(id) %>% 
  mutate(tokens_sum_child = sum(tokens), 
         dur_hr = sum(dur_min)/60, 
         tokens_hr_child = tokens_sum_child/dur_hr) %>% 
  distinct(id, language, tokens_hr_child) %>% 
  ungroup() %>% 
  mutate(id = as.character(id))

Merge freq_adult, child tokens per hour, and demographic info

# freq
freq_all_mm <- freq_adult %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child, by = c("id", "language")) %>% 
  left_join(demo, by = c("id", "language"))


# mlu
mlu_all_mm <- mlu_adult %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child, by = c("id", "language")) %>% 
  left_join(demo, by = c("id", "language"))



# chip
chip_mm <- chip %>% 
  mutate(id = as.character(id)) %>% 
  left_join(freq_hr_child, by = c("id", "language")) %>% 
  left_join(demo, by = c("id", "language"))

Mixed models - 4 categories

TOKENS x ACTIVITY - BOTH - ALL TALK

# # remove outlier - still get interaction
# freq_all_mm2 <- freq_all_mm %>% 
#   filter(tokens_permin_avg < 530)


# comparing models
# singular fit for MLU with segment_num as random intercept, so removed this for all models
m1_both_tokens_all <- lmer(tokens_permin2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m2_both_tokens_all <- lmer(tokens_permin2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = freq_all_mm, REML = F)

# see if adding the intx adds
anova(m1_both_tokens_all, m2_both_tokens_all)
## Data: freq_all_mm
## Models:
## m1_both_tokens_all: tokens_permin2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_tokens_all:     id)
## m2_both_tokens_all: tokens_permin2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_tokens_all:     id)
##                    npar   AIC   BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_tokens_all    8 14508 14550 -7246.2    14492                       
## m2_both_tokens_all   11 14504 14562 -7241.3    14482 9.8635  3    0.01976 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_both_tokens_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                   Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child    18563   18563     1   89.39 14.1533 0.0003007 ***
## language           19512   19512     1  131.94 14.8765 0.0001787 ***
## activity          333895  111298     3 1379.70 84.8594 < 2.2e-16 ***
## language:activity  12982    4327     3 1379.74  3.2994 0.0197350 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
# interaction - post hoc tests of simple effects
m2_both_tokens_all_emmeans_simple1 <- emmeans(m2_both_tokens_all, ~ activity | language)
m2_both_tokens_all_emmeans_simple2 <- emmeans(m2_both_tokens_all, ~ language | activity)

m2_both_tokens_all_emmeans_simple1
## language = english:
##  activity emmean   SE   df lower.CL upper.CL
##  books      91.6 5.88 1088     80.1    103.1
##  cc         66.2 3.18  233     59.9     72.4
##  ac         75.5 3.46  315     68.7     82.3
##  non_tcds   36.5 3.07  205     30.4     42.5
## 
## language = spanish:
##  activity emmean   SE   df lower.CL upper.CL
##  books      67.8 6.59 1189     54.8     80.7
##  cc         52.9 3.28  261     46.4     59.4
##  ac         57.6 3.46  312     50.8     64.4
##  non_tcds   31.7 3.07  206     25.6     37.7
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
rbind(pairs(m2_both_tokens_all_emmeans_simple1), pairs(m2_both_tokens_all_emmeans_simple2))
##  language activity contrast          estimate   SE   df t.ratio p.value
##  english  .        books - cc           25.41 5.97 1407  4.260  0.0003 
##  english  .        books - ac           16.06 6.17 1419  2.602  0.1499 
##  english  .        books - non_tcds     55.11 5.91 1408  9.323  <.0001 
##  english  .        cc - ac              -9.35 3.62 1376 -2.582  0.1589 
##  english  .        cc - non_tcds        29.70 3.24 1357  9.172  <.0001 
##  english  .        ac - non_tcds        39.05 3.52 1368 11.103  <.0001 
##  spanish  .        books - cc           14.87 6.75 1427  2.203  0.4435 
##  spanish  .        books - ac           10.17 6.87 1433  1.480  1.0000 
##  spanish  .        books - non_tcds     36.12 6.63 1424  5.450  <.0001 
##  spanish  .        cc - ac              -4.70 3.71 1382 -1.268  1.0000 
##  spanish  .        cc - non_tcds        21.25 3.35 1362  6.351  <.0001 
##  spanish  .        ac - non_tcds        25.95 3.52 1370  7.378  <.0001 
##  .        books    english - spanish    23.82 8.83 1143  2.697  0.1136 
##  .        cc       english - spanish    13.28 4.57  247  2.906  0.0640 
##  .        ac       english - spanish    17.93 4.89  312  3.664  0.0047 
##  .        non_tcds english - spanish     4.83 4.35  205  1.112  1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 16 tests
confint(rbind(pairs(m2_both_tokens_all_emmeans_simple1), pairs(m2_both_tokens_all_emmeans_simple2)))
##  language activity contrast          estimate   SE   df lower.CL upper.CL
##  english  .        books - cc           25.41 5.97 1407    7.751    43.07
##  english  .        books - ac           16.06 6.17 1419   -2.213    34.33
##  english  .        books - non_tcds     55.11 5.91 1408   37.610    72.61
##  english  .        cc - ac              -9.35 3.62 1376  -20.076     1.37
##  english  .        cc - non_tcds        29.70 3.24 1357   20.113    39.28
##  english  .        ac - non_tcds        39.05 3.52 1368   28.639    49.46
##  spanish  .        books - cc           14.87 6.75 1427   -5.108    34.86
##  spanish  .        books - ac           10.17 6.87 1433  -10.175    30.51
##  spanish  .        books - non_tcds     36.12 6.63 1424   16.500    55.74
##  spanish  .        cc - ac              -4.70 3.71 1382  -15.687     6.28
##  spanish  .        cc - non_tcds        21.25 3.35 1362   11.343    31.15
##  spanish  .        ac - non_tcds        25.95 3.52 1370   15.538    36.36
##  .        books    english - spanish    23.82 8.83 1143   -2.336    49.97
##  .        cc       english - spanish    13.28 4.57  247   -0.361    26.92
##  .        ac       english - spanish    17.93 4.89  312    3.357    32.50
##  .        non_tcds english - spanish     4.83 4.35  205   -8.164    17.82
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 16 estimates
# plot
tokens_emmeans <- data.frame(emmeans(m2_both_tokens_all, ~ activity | language))


tokens <- ggplot(tokens_emmeans, aes(activity, emmean, colour = activity, shape = language, group = language)) + 
  # geom_line(aes(linetype = language)) +
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_shape_manual(values = c(16, 21), 
                     name="Language",
                     labels=c('English', 'Spanish')) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "grey", "black"),
                     name="Activity",
                     labels=c('Books','Other Child-Cent','Adult-Cent', 'non-tCDS')) +
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Other Child-Cent','Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "Tokens (rate per min)")

tokens

ggsave("./figures/emmeans_tokens_noline.pdf", width = 6, height = 6, units = "in")


# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_both_tokens_all, type = "diag") 
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

TYPES x ACTIVITY - BOTH - ALL TALK

# tried without the outlier and no change
# singular fit for MLU with segment_num as random intercept, so removed this for all models

m1_both_types_all <- lmer(types_permin2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m2_both_types_all <- lmer(types_permin2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = freq_all_mm, REML = F)

# see if adding the intx adds
anova(m1_both_types_all, m2_both_types_all)
## Data: freq_all_mm
## Models:
## m1_both_types_all: types_permin2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_types_all:     id)
## m2_both_types_all: types_permin2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_types_all:     id)
##                   npar   AIC   BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_types_all    8 13902 13944 -6942.9    13886                       
## m2_both_types_all   11 13898 13956 -6938.2    13876 9.3528  3    0.02495 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_both_types_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                   Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child    10217   10217     1   87.37 11.3797  0.001109 ** 
## language            6063    6063     1  168.75  6.7532  0.010186 *  
## activity          160506   53502     3 1392.65 59.5888 < 2.2e-16 ***
## language:activity   8427    2809     3 1392.38  3.1286  0.024896 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
# interaction - post hoc tests of simple effects
m2_both_types_all_emmeans_simple1 <- emmeans(m2_both_types_all, ~ activity | language)
m2_both_types_all_emmeans_simple2 <- emmeans(m2_both_types_all, ~ language | activity)

m2_both_types_all_emmeans_simple1
## language = english:
##  activity emmean   SE   df lower.CL upper.CL
##  books      29.2 4.58 1269     20.2     38.1
##  cc         23.9 2.17  406     19.6     28.1
##  ac         52.2 2.44  557     47.4     57.0
##  non_tcds   20.4 2.07  347     16.3     24.5
## 
## language = spanish:
##  activity emmean   SE   df lower.CL upper.CL
##  books      23.2 5.16 1283     13.1     33.4
##  cc         20.2 2.27  460     15.7     24.6
##  ac         38.2 2.44  547     33.5     43.0
##  non_tcds   18.5 2.07  350     14.4     22.5
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
rbind(pairs(m2_both_types_all_emmeans_simple1), pairs(m2_both_types_all_emmeans_simple2))
##  language activity contrast          estimate   SE   df t.ratio p.value
##  english  .        books - cc            5.30 4.88 1438  1.086  1.0000 
##  english  .        books - ac          -23.04 5.03 1444 -4.579  0.0001 
##  english  .        books - non_tcds      8.77 4.83 1438  1.813  1.0000 
##  english  .        cc - ac             -28.34 2.98 1396 -9.499  <.0001 
##  english  .        cc - non_tcds         3.47 2.68 1361  1.296  1.0000 
##  english  .        ac - non_tcds        31.81 2.90 1383 10.964  <.0001 
##  spanish  .        books - cc            3.09 5.49 1443  0.563  1.0000 
##  spanish  .        books - ac          -15.00 5.58 1438 -2.689  0.1160 
##  spanish  .        books - non_tcds      4.79 5.40 1444  0.888  1.0000 
##  spanish  .        cc - ac             -18.09 3.05 1403 -5.926  <.0001 
##  spanish  .        cc - non_tcds         1.70 2.76 1371  0.616  1.0000 
##  spanish  .        ac - non_tcds        19.79 2.90 1385  6.823  <.0001 
##  .        books    english - spanish     5.92 6.90 1275  0.858  1.0000 
##  .        cc       english - spanish     3.71 3.15  433  1.178  1.0000 
##  .        ac       english - spanish    13.96 3.45  551  4.040  0.0010 
##  .        non_tcds english - spanish     1.94 2.93  348  0.663  1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 16 tests
confint(rbind(pairs(m2_both_types_all_emmeans_simple1), pairs(m2_both_types_all_emmeans_simple2)))
##  language activity contrast          estimate   SE   df lower.CL upper.CL
##  english  .        books - cc            5.30 4.88 1438    -9.15    19.74
##  english  .        books - ac          -23.04 5.03 1444   -37.94    -8.14
##  english  .        books - non_tcds      8.77 4.83 1438    -5.54    23.08
##  english  .        cc - ac             -28.34 2.98 1396   -37.17   -19.51
##  english  .        cc - non_tcds         3.47 2.68 1361    -4.45    11.39
##  english  .        ac - non_tcds        31.81 2.90 1383    23.22    40.40
##  spanish  .        books - cc            3.09 5.49 1443   -13.16    19.34
##  spanish  .        books - ac          -15.00 5.58 1438   -31.51     1.51
##  spanish  .        books - non_tcds      4.79 5.40 1444   -11.19    20.77
##  spanish  .        cc - ac             -18.09 3.05 1403   -27.13    -9.05
##  spanish  .        cc - non_tcds         1.70 2.76 1371    -6.48     9.88
##  spanish  .        ac - non_tcds        19.79 2.90 1385    11.20    28.38
##  .        books    english - spanish     5.92 6.90 1275   -14.50    26.34
##  .        cc       english - spanish     3.71 3.15  433    -5.65    13.07
##  .        ac       english - spanish    13.96 3.45  551     3.70    24.21
##  .        non_tcds english - spanish     1.94 2.93  348    -6.77    10.66
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 16 estimates
# plot
types_emmeans <- data.frame(emmeans(m2_both_types_all, ~ activity | language))



types <- ggplot(types_emmeans, aes(activity, emmean, colour = activity, shape = language, group = language)) + 
  # geom_line(aes(color = activity)) +
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_shape_manual(values = c(16, 21)) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "grey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Other Child-Cent','Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "Types (rate per min)")

types

ggsave("./figures/emmeans_types_noline.pdf", width = 6, height = 6, units = "in")


# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m2_both_types_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

MLUw x ACTIVITY - BOTH - ALL TALK

# singular fit with segment_num as random intercept, so removed this
m1_both_mlu_all <- lmer(mlu_w2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = mlu_all_mm, REML = F)

m2_both_mlu_all <- lmer(mlu_w2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = mlu_all_mm, REML = F)

# see if adding the intx adds
anova(m1_both_mlu_all, m2_both_mlu_all)
## Data: mlu_all_mm
## Models:
## m1_both_mlu_all: mlu_w2 ~ tokens_hr_child + language + activity + (1 | id)
## m2_both_mlu_all: mlu_w2 ~ tokens_hr_child + (language * activity) + (1 | id)
##                 npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_both_mlu_all    8 3683.4 3724.8 -1833.7   3667.4                     
## m2_both_mlu_all   11 3686.7 3743.6 -1832.3   3664.7 2.6436  3     0.4499
# anova table for final model
anova(m1_both_mlu_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child  17.496  17.496     1   86.28  19.848 2.502e-05 ***
## language         42.151  42.151     1   88.09  47.818 7.114e-10 ***
## activity        179.377  59.792     3 1251.14  67.831 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m1_both_mlu_all_emmeans_activity <- emmeans(m1_both_mlu_all, ~ activity)
m1_both_mlu_all_emmeans_language <- emmeans(m1_both_mlu_all, ~ language)

m1_both_mlu_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books      4.30 0.1169 959     4.07     4.53
##  cc         3.29 0.0638 200     3.17     3.42
##  ac         3.13 0.0682 254     2.99     3.26
##  non_tcds   3.90 0.0654 218     3.77     4.03
## 
## Results are averaged over the levels of: language 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
m1_both_mlu_all_emmeans_language
##  language emmean     SE  df lower.CL upper.CL
##  english    4.01 0.0771 106     3.86     4.17
##  spanish    3.30 0.0779 110     3.14     3.45
## 
## Results are averaged over the levels of: activity 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m1_both_mlu_all_emmeans_activity)
##  contrast         estimate     SE   df t.ratio p.value
##  books - cc          1.004 0.1166 1282   8.611 <.0001 
##  books - ac          1.170 0.1201 1290   9.744 <.0001 
##  books - non_tcds    0.398 0.1175 1282   3.386 0.0041 
##  cc - ac             0.166 0.0676 1246   2.451 0.0684 
##  cc - non_tcds      -0.606 0.0647 1239  -9.376 <.0001 
##  ac - non_tcds      -0.772 0.0685 1236 -11.261 <.0001 
## 
## Results are averaged over the levels of: language 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 4 estimates
# emmans per language using model WIHTOUT interactions
m1_both_mlu_all_emmeans_simple1 <- emmeans(m1_both_mlu_all, ~ activity | language)
m1_both_mlu_all_emmeans_simple2 <- emmeans(m1_both_mlu_all, ~ language | activity)

m1_both_mlu_all_emmeans_simple1
## language = english:
##  activity emmean     SE  df lower.CL upper.CL
##  books      4.66 0.1277 583     4.41     4.91
##  cc         3.65 0.0824 140     3.49     3.82
##  ac         3.49 0.0862 166     3.32     3.66
##  non_tcds   4.26 0.0839 150     4.09     4.42
## 
## language = spanish:
##  activity emmean     SE  df lower.CL upper.CL
##  books      3.94 0.1288 595     3.69     4.19
##  cc         2.93 0.0833 145     2.77     3.10
##  ac         2.77 0.0863 165     2.60     2.94
##  non_tcds   3.54 0.0842 151     3.37     3.71
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
rbind(pairs(m1_both_mlu_all_emmeans_simple1), pairs(m1_both_mlu_all_emmeans_simple2))
##  language activity contrast          estimate     SE     df t.ratio p.value
##  english  .        books - cc           1.004 0.1166 1281.7   8.611 <.0001 
##  english  .        books - ac           1.170 0.1201 1289.7   9.744 <.0001 
##  english  .        books - non_tcds     0.398 0.1175 1282.1   3.386 0.0117 
##  english  .        cc - ac              0.166 0.0676 1246.1   2.451 0.2303 
##  english  .        cc - non_tcds       -0.606 0.0647 1238.5  -9.376 <.0001 
##  english  .        ac - non_tcds       -0.772 0.0685 1235.5 -11.261 <.0001 
##  spanish  .        books - cc           1.004 0.1166 1281.7   8.611 <.0001 
##  spanish  .        books - ac           1.170 0.1201 1289.7   9.744 <.0001 
##  spanish  .        books - non_tcds     0.398 0.1175 1282.1   3.386 0.0117 
##  spanish  .        cc - ac              0.166 0.0676 1246.1   2.451 0.2303 
##  spanish  .        cc - non_tcds       -0.606 0.0647 1238.5  -9.376 <.0001 
##  spanish  .        ac - non_tcds       -0.772 0.0685 1235.5 -11.261 <.0001 
##  .        books    english - spanish    0.718 0.1057   93.2   6.794 <.0001 
##  .        cc       english - spanish    0.718 0.1057   93.2   6.794 <.0001 
##  .        ac       english - spanish    0.718 0.1057   93.2   6.794 <.0001 
##  .        non_tcds english - spanish    0.718 0.1057   93.2   6.794 <.0001 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 16 tests
confint(rbind(pairs(m1_both_mlu_all_emmeans_simple1), pairs(m1_both_mlu_all_emmeans_simple2)))
##  language activity contrast          estimate     SE     df lower.CL upper.CL
##  english  .        books - cc           1.004 0.1166 1281.7   0.6588    1.349
##  english  .        books - ac           1.170 0.1201 1289.7   0.8144    1.525
##  english  .        books - non_tcds     0.398 0.1175 1282.1   0.0500    0.746
##  english  .        cc - ac              0.166 0.0676 1246.1  -0.0345    0.366
##  english  .        cc - non_tcds       -0.606 0.0647 1238.5  -0.7976   -0.415
##  english  .        ac - non_tcds       -0.772 0.0685 1235.5  -0.9749   -0.569
##  spanish  .        books - cc           1.004 0.1166 1281.7   0.6588    1.349
##  spanish  .        books - ac           1.170 0.1201 1289.7   0.8144    1.525
##  spanish  .        books - non_tcds     0.398 0.1175 1282.1   0.0500    0.746
##  spanish  .        cc - ac              0.166 0.0676 1246.1  -0.0345    0.366
##  spanish  .        cc - non_tcds       -0.606 0.0647 1238.5  -0.7976   -0.415
##  spanish  .        ac - non_tcds       -0.772 0.0685 1235.5  -0.9749   -0.569
##  .        books    english - spanish    0.718 0.1057   93.2   0.3973    1.039
##  .        cc       english - spanish    0.718 0.1057   93.2   0.3973    1.039
##  .        ac       english - spanish    0.718 0.1057   93.2   0.3973    1.039
##  .        non_tcds english - spanish    0.718 0.1057   93.2   0.3973    1.039
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 16 estimates
# plot
mlu_emmeans <- data.frame(emmeans(m1_both_mlu_all, ~ activity | language))


mlu <- ggplot(mlu_emmeans, aes(activity, emmean, colour = activity, shape = language, group = language)) + 
  # geom_line(aes(color = activity)) +
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_shape_manual(values = c(16, 21)) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "grey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Other Child-Cent','Adult-Cent', 'non-tCDS')) +
  labs(x = "", y = "EMM", title = "MLUw")

ggsave("./figures/emmeans_mlu_noline.pdf", width = 6, height = 6, units = "in")


# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m1_both_mlu_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP RESPONSES x ACTIVITY - BOTH - ALL TALK

m1_both_propresp_all <- lmer(prop_adultresp2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = chip_mm, REML = F)

m2_both_propresp_all <- lmer(prop_adultresp2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = chip_mm, REML = F)

# see if adding the intx adds
anova(m1_both_propresp_all, m2_both_propresp_all)
## Data: chip_mm
## Models:
## m1_both_propresp_all: prop_adultresp2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_propresp_all:     id)
## m2_both_propresp_all: prop_adultresp2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_propresp_all:     id)
##                      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_propresp_all    7 1884.8 1917.8 -935.43   1870.8                       
## m2_both_propresp_all    9 1882.2 1924.7 -932.12   1864.2 6.6035  2    0.03682 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for final model
anova(m2_both_propresp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child   12.3461 12.3461     1  83.90 23.7737 5.073e-06 ***
## language           3.1791  3.1791     1 135.49  6.1218   0.01459 *  
## activity          18.0184  9.0092     2 801.96 17.3481 4.209e-08 ***
## language:activity  3.4434  1.7217     2 801.53  3.3153   0.03682 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
# interaction - post hoc tests of simple effects
m2_both_propresp_all_emmeans_simple1 <- emmeans(m2_both_propresp_all, ~ activity | language)
m2_both_propresp_all_emmeans_simple2 <- emmeans(m2_both_propresp_all, ~ language | activity)

m2_both_propresp_all_emmeans_simple1
## language = english:
##  activity emmean     SE  df lower.CL upper.CL
##  books      2.44 0.1174 659     2.21     2.67
##  cc         1.90 0.0619 168     1.78     2.02
##  ac         1.88 0.0727 277     1.74     2.02
## 
## language = spanish:
##  activity emmean     SE  df lower.CL upper.CL
##  books      2.14 0.1322 690     1.88     2.39
##  cc         1.87 0.0644 191     1.74     1.99
##  ac         1.59 0.0715 264     1.45     1.73
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
rbind(pairs(m2_both_propresp_all_emmeans_simple1), pairs(m2_both_propresp_all_emmeans_simple2))
##  language activity contrast          estimate     SE  df t.ratio p.value
##  english  .        books - cc          0.5444 0.1204 816 4.522   0.0001 
##  english  .        books - ac          0.5623 0.1282 829 4.385   0.0001 
##  english  .        cc - ac             0.0180 0.0781 796 0.230   1.0000 
##  spanish  .        books - cc          0.2691 0.1367 829 1.969   0.4436 
##  spanish  .        books - ac          0.5491 0.1413 830 3.886   0.0010 
##  spanish  .        cc - ac             0.2799 0.0788 789 3.551   0.0037 
##  .        books    english - spanish   0.3061 0.1767 675 1.732   0.7539 
##  .        cc       english - spanish   0.0309 0.0894 180 0.345   1.0000 
##  .        ac       english - spanish   0.2929 0.1020 270 2.872   0.0396 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 9 tests
confint(rbind(pairs(m2_both_propresp_all_emmeans_simple1), pairs(m2_both_propresp_all_emmeans_simple2)))
##  language activity contrast          estimate     SE  df lower.CL upper.CL
##  english  .        books - cc          0.5444 0.1204 816  0.20963    0.879
##  english  .        books - ac          0.5623 0.1282 829  0.20577    0.919
##  english  .        cc - ac             0.0180 0.0781 796 -0.19930    0.235
##  spanish  .        books - cc          0.2691 0.1367 829 -0.11088    0.649
##  spanish  .        books - ac          0.5491 0.1413 830  0.15622    0.942
##  spanish  .        cc - ac             0.2799 0.0788 789  0.06073    0.499
##  .        books    english - spanish   0.3061 0.1767 675 -0.18559    0.798
##  .        cc       english - spanish   0.0309 0.0894 180 -0.22005    0.282
##  .        ac       english - spanish   0.2929 0.1020 270  0.00782    0.578
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 9 estimates
# plot
propresp_emmeans <- data.frame(emmeans(m2_both_propresp_all, ~ activity | language))



resp <- ggplot(propresp_emmeans, aes(activity, emmean, colour = activity, shape = language, group = language)) + 
  # geom_line(aes(color = activity)) +
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_shape_manual(values = c(16, 21)) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "grey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Other Child-Cent','Adult-Cent')) +
  labs(x = "", y = "EMM", title = "Proportion of Responses")

ggsave("./figures/emmeans_resp_noline.pdf", width = 6, height = 6, units = "in")



# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m1_both_propresp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

PROP IMIT/EXP x ACTIVITY - BOTH - ALL TALK

# # tried removing two outliers, but no change in findings
# chip_mm2 <- chip_mm %>% 
#   filter(prop_adult_imitexp_avg < 2.9)

m1_both_propimitexp_all <- lmer(prop_adult_imitexp2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = chip_mm, REML = F)

m2_both_propimitexp_all <- lmer(prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = chip_mm, REML = F)

# see if adding the intx adds
anova(m1_both_propimitexp_all, m2_both_propimitexp_all)
## Data: chip_mm
## Models:
## m1_both_propimitexp_all: prop_adult_imitexp2 ~ tokens_hr_child + language + activity + 
## m1_both_propimitexp_all:     (1 | id)
## m2_both_propimitexp_all: prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) + 
## m2_both_propimitexp_all:     (1 | id)
##                         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_both_propimitexp_all    7 456.64 489.63 -221.32   442.64                     
## m2_both_propimitexp_all    9 458.71 501.12 -220.35   440.71 1.9303  2     0.3809
# anova table for final model
anova(m1_both_propimitexp_all)
## Type III Analysis of Variance Table with Satterthwaite's method
##                  Sum Sq Mean Sq NumDF  DenDF F value   Pr(>F)   
## tokens_hr_child 0.13640 0.13640     1  86.42  1.4590 0.230382   
## language        0.11961 0.11961     1  86.58  1.2794 0.261126   
## activity        0.98600 0.49300     2 804.92  5.2736 0.005304 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# emmeans
m1_both_propimitexp_all_emmeans_activity <- emmeans(m1_both_propimitexp_all, ~ activity)
m1_both_propimitexp_all_emmeans_language <- emmeans(m1_both_propimitexp_all, ~ language)

m1_both_propimitexp_all_emmeans_activity
##  activity emmean     SE  df lower.CL upper.CL
##  books     0.448 0.0366 683    0.376    0.520
##  cc        0.390 0.0181 189    0.354    0.426
##  ac        0.333 0.0209 289    0.292    0.374
## 
## Results are averaged over the levels of: language 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
m1_both_propimitexp_all_emmeans_language
##  language emmean     SE  df lower.CL upper.CL
##  english   0.407 0.0227 118    0.362    0.452
##  spanish   0.374 0.0233 127    0.328    0.420
## 
## Results are averaged over the levels of: activity 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
pairs(m1_both_propimitexp_all_emmeans_activity)
##  contrast   estimate     SE  df t.ratio p.value
##  books - cc   0.0581 0.0381 825 1.526   0.2793 
##  books - ac   0.1151 0.0400 828 2.880   0.0114 
##  cc - ac      0.0569 0.0234 795 2.428   0.0407 
## 
## Results are averaged over the levels of: language 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
# emmans per language using model WIHTOUT interactions
m1_both_propimitexp_all_emmeans_simple1 <- emmeans(m1_both_propimitexp_all, ~ activity | language)
m1_both_propimitexp_all_emmeans_simple2 <- emmeans(m1_both_propimitexp_all, ~ language | activity)

m1_both_propimitexp_all_emmeans_simple1
## language = english:
##  activity emmean     SE  df lower.CL upper.CL
##  books     0.465 0.0393 500    0.388    0.542
##  cc        0.407 0.0232 132    0.361    0.453
##  ac        0.350 0.0259 189    0.299    0.401
## 
## language = spanish:
##  activity emmean     SE  df lower.CL upper.CL
##  books     0.431 0.0400 514    0.353    0.510
##  cc        0.373 0.0239 144    0.326    0.421
##  ac        0.316 0.0257 183    0.266    0.367
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95
rbind(pairs(m1_both_propimitexp_all_emmeans_simple1), pairs(m1_both_propimitexp_all_emmeans_simple2))
##  language activity contrast          estimate     SE    df t.ratio p.value
##  english  .        books - cc          0.0581 0.0381 824.7 1.526   1.0000 
##  english  .        books - ac          0.1151 0.0400 828.0 2.880   0.0367 
##  english  .        cc - ac             0.0569 0.0234 795.3 2.428   0.1385 
##  spanish  .        books - cc          0.0581 0.0381 824.7 1.526   1.0000 
##  spanish  .        books - ac          0.1151 0.0400 828.0 2.880   0.0367 
##  spanish  .        cc - ac             0.0569 0.0234 795.3 2.428   0.1385 
##  .        books    english - spanish   0.0335 0.0301  92.7 1.110   1.0000 
##  .        cc       english - spanish   0.0335 0.0301  92.7 1.110   1.0000 
##  .        ac       english - spanish   0.0335 0.0301  92.7 1.110   1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 9 tests
confint(rbind(pairs(m1_both_propimitexp_all_emmeans_simple1), pairs(m1_both_propimitexp_all_emmeans_simple2)))
##  language activity contrast          estimate     SE    df lower.CL upper.CL
##  english  .        books - cc          0.0581 0.0381 824.7 -0.04780    0.164
##  english  .        books - ac          0.1151 0.0400 828.0  0.00399    0.226
##  english  .        cc - ac             0.0569 0.0234 795.3 -0.00826    0.122
##  spanish  .        books - cc          0.0581 0.0381 824.7 -0.04780    0.164
##  spanish  .        books - ac          0.1151 0.0400 828.0  0.00399    0.226
##  spanish  .        cc - ac             0.0569 0.0234 795.3 -0.00826    0.122
##  .        books    english - spanish   0.0335 0.0301  92.7 -0.05213    0.119
##  .        cc       english - spanish   0.0335 0.0301  92.7 -0.05213    0.119
##  .        ac       english - spanish   0.0335 0.0301  92.7 -0.05213    0.119
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 9 estimates
# plot
propimitexp_emmeans <- data.frame(emmeans(m1_both_propimitexp_all, ~ activity | language))


imit_exp <- ggplot(propimitexp_emmeans, aes(activity, emmean, colour = activity, shape = language, group = language)) + 
  # geom_line(aes(color = activity)) +
  geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL),
                  position = position_dodge(width = 0.2), 
                  size = 1) +
  scale_shape_manual(values = c(16, 21)) + 
  scale_color_manual(values=c("darkviolet", "firebrick1", "grey", "black")) + 
  theme(legend.position = "none",
        text = element_text(size = 20),
        axis.text.x = element_text(angle = 40, vjust = .9, hjust=.9)) +
  scale_x_discrete(labels = c('Books','Other Child-Cent','Adult-Cent')) +
  labs(x = "", y = "EMM", title = "Proportion of Imitations/Expansions")

ggsave("./figures/emmeans_imit_exp_noline.pdf", width = 6, height = 6, units = "in")


# model diagnostics
# only takes into account fixed effects, not random effects
plot_model(m1_both_propimitexp_all, type = "diag")
## [[1]]

## 
## [[2]]
## [[2]]$id

## 
## 
## [[3]]

## 
## [[4]]

Combining plots (NO LINE)

# grid all
ggarrange(tokens, types, mlu, resp, imit_exp, common.legend = T, legend = "bottom")

ggsave("./figures/emmeans_grid_all_noline.pdf", width = 14, height = 10, units = "in", dpi = 300)

Checking HI as Covariate

TOKENS x ACTIVITY - BOTH - ALL TALK - WITH HI

# comparing models
# singular fit for MLU with segment_num as random intercept, so removed this for all models
m1_both_tokens_all_hi <- lmer(tokens_permin2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m2_both_tokens_all_hi <- lmer(tokens_permin2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m3_both_tokens_all_hi <- lmer(tokens_permin2 ~ tokens_hr_child + (language * activity) + hi +
                                (1 | id),
                                data = freq_all_mm, REML = F)

# see if adding mom ed after the intx adds
anova(m1_both_tokens_all_hi, m2_both_tokens_all_hi, m3_both_tokens_all_hi)
## Data: freq_all_mm
## Models:
## m1_both_tokens_all_hi: tokens_permin2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_tokens_all_hi:     id)
## m2_both_tokens_all_hi: tokens_permin2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_tokens_all_hi:     id)
## m3_both_tokens_all_hi: tokens_permin2 ~ tokens_hr_child + (language * activity) + hi + 
## m3_both_tokens_all_hi:     (1 | id)
##                       npar   AIC   BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_tokens_all_hi    8 14508 14550 -7246.2    14492                       
## m2_both_tokens_all_hi   11 14504 14562 -7241.3    14482 9.8635  3    0.01976 *
## m3_both_tokens_all_hi   12 14506 14570 -7241.2    14482 0.1263  1    0.72228  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for model with intx
anova(m3_both_tokens_all_hi)
## Type III Analysis of Variance Table with Satterthwaite's method
##                   Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child    18658   18658     1   89.40 14.2257 0.0002908 ***
## language           17357   17357     1  123.81 13.2333 0.0004021 ***
## activity          334012  111337     3 1379.16 84.8874 < 2.2e-16 ***
## hi                   166     166     1   89.36  0.1264 0.7229869    
## language:activity  12987    4329     3 1379.30  3.3006 0.0197043 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TYPES x ACTIVITY - BOTH - ALL TALK - WITH HI

# tried without the outlier and no change
# singular fit for MLU with segment_num as random intercept, so removed this for all models

m1_both_types_all_hi <- lmer(types_permin2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m2_both_types_all_hi <- lmer(types_permin2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = freq_all_mm, REML = F)

m3_both_types_all_hi <- lmer(types_permin2 ~ tokens_hr_child + (language * activity) + hi +
                                (1 | id),
                                data = freq_all_mm, REML = F)

# see if adding mom ed after the intx adds
anova(m1_both_types_all_hi, m2_both_types_all_hi, m3_both_types_all_hi)
## Data: freq_all_mm
## Models:
## m1_both_types_all_hi: types_permin2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_types_all_hi:     id)
## m2_both_types_all_hi: types_permin2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_types_all_hi:     id)
## m3_both_types_all_hi: types_permin2 ~ tokens_hr_child + (language * activity) + hi + 
## m3_both_types_all_hi:     (1 | id)
##                      npar   AIC   BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_types_all_hi    8 13902 13944 -6942.9    13886                       
## m2_both_types_all_hi   11 13898 13956 -6938.2    13876 9.3528  3    0.02495 *
## m3_both_types_all_hi   12 13900 13964 -6938.2    13876 0.0363  1    0.84898  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for model with intx
anova(m3_both_types_all_hi)
## Type III Analysis of Variance Table with Satterthwaite's method
##                   Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child     9738    9738     1   87.52 10.8466  0.001429 ** 
## language            4729    4729     1  153.03  5.2672  0.023089 *  
## activity          160531   53510     3 1392.87 59.6000 < 2.2e-16 ***
## hi                    33      33     1   87.72  0.0363  0.849423    
## language:activity   8447    2816     3 1392.18  3.1360  0.024647 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

MLUw x ACTIVITY - BOTH - ALL TALK - WITH HI

# singular fit with segment_num as random intercept, so removed this
m1_both_mlu_all_hi <- lmer(mlu_w2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = mlu_all_mm, REML = F)

m2_both_mlu_all_hi <- lmer(mlu_w2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = mlu_all_mm, REML = F)

m3_both_mlu_all_hi <- lmer(mlu_w2 ~ tokens_hr_child + (language * activity) + hi +
                                (1 | id),
                                data = mlu_all_mm, REML = F)

# see if adding mom ed after the intx adds
anova(m1_both_mlu_all_hi, m2_both_mlu_all_hi, m3_both_mlu_all_hi)
## Data: mlu_all_mm
## Models:
## m1_both_mlu_all_hi: mlu_w2 ~ tokens_hr_child + language + activity + (1 | id)
## m2_both_mlu_all_hi: mlu_w2 ~ tokens_hr_child + (language * activity) + (1 | id)
## m3_both_mlu_all_hi: mlu_w2 ~ tokens_hr_child + (language * activity) + hi + (1 | 
## m3_both_mlu_all_hi:     id)
##                    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_both_mlu_all_hi    8 3683.4 3724.8 -1833.7   3667.4                     
## m2_both_mlu_all_hi   11 3686.7 3743.6 -1832.3   3664.7 2.6436  3     0.4499
## m3_both_mlu_all_hi   12 3688.4 3750.5 -1832.2   3664.4 0.2936  1     0.5879
# anova table for model with intx
anova(m3_both_mlu_all_hi)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq Mean Sq NumDF   DenDF F value    Pr(>F)    
## tokens_hr_child    16.606  16.606     1   86.33 18.8694  3.80e-05 ***
## language           32.530  32.530     1  111.85 36.9639  1.71e-08 ***
## activity          175.612  58.537     3 1251.43 66.5171 < 2.2e-16 ***
## hi                  0.259   0.259     1   87.31  0.2941    0.5890    
## language:activity   2.304   0.768     3 1251.71  0.8727    0.4546    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PROP RESPONSES x ACTIVITY - BOTH - ALL TALK - WITH HI

m1_both_propresp_all_hi <- lmer(prop_adultresp2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = chip_mm, REML = F)

m2_both_propresp_all_hi <- lmer(prop_adultresp2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = chip_mm, REML = F)

m3_both_propresp_all_hi <- lmer(prop_adultresp2 ~ tokens_hr_child + (language * activity) + hi +
                                (1 | id),
                                data = chip_mm, REML = F)

# see if adding mom ed after the intx adds
anova(m1_both_propresp_all_hi, m2_both_propresp_all_hi, m3_both_propresp_all_hi)
## Data: chip_mm
## Models:
## m1_both_propresp_all_hi: prop_adultresp2 ~ tokens_hr_child + language + activity + (1 | 
## m1_both_propresp_all_hi:     id)
## m2_both_propresp_all_hi: prop_adultresp2 ~ tokens_hr_child + (language * activity) + (1 | 
## m2_both_propresp_all_hi:     id)
## m3_both_propresp_all_hi: prop_adultresp2 ~ tokens_hr_child + (language * activity) + hi + 
## m3_both_propresp_all_hi:     (1 | id)
##                         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## m1_both_propresp_all_hi    7 1884.8 1917.8 -935.43   1870.8                       
## m2_both_propresp_all_hi    9 1882.2 1924.7 -932.12   1864.2 6.6035  2    0.03682 *
## m3_both_propresp_all_hi   10 1883.3 1930.4 -931.64   1863.3 0.9754  1    0.32333  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# anova table for model with intx
anova(m3_both_propresp_all_hi)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## tokens_hr_child   11.4097 11.4097     1  83.05 21.9660 1.074e-05 ***
## language           3.7108  3.7108     1 125.12  7.1440  0.008524 ** 
## activity          18.3616  9.1808     2 801.92 17.6749 3.078e-08 ***
## hi                 0.5101  0.5101     1  81.40  0.9821  0.324620    
## language:activity  3.4075  1.7037     2 801.05  3.2801  0.038132 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PROP IMIT/EXP x ACTIVITY - BOTH - ALL TALK - WITH HI

m1_both_propimitexp_all_hi <- lmer(prop_adult_imitexp2 ~ tokens_hr_child + language + activity +
                                (1 | id),
                                data = chip_mm, REML = F)

m2_both_propimitexp_all_hi <- lmer(prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) +
                                (1 | id),
                                data = chip_mm, REML = F)

m3_both_propimitexp_all_hi <- lmer(prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) + hi +
                                (1 | id),
                                data = chip_mm, REML = F)

# see if adding mom ed after the intx adds
anova(m1_both_propimitexp_all_hi, m2_both_propimitexp_all_hi, m3_both_propimitexp_all_hi)
## Data: chip_mm
## Models:
## m1_both_propimitexp_all_hi: prop_adult_imitexp2 ~ tokens_hr_child + language + activity + 
## m1_both_propimitexp_all_hi:     (1 | id)
## m2_both_propimitexp_all_hi: prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) + 
## m2_both_propimitexp_all_hi:     (1 | id)
## m3_both_propimitexp_all_hi: prop_adult_imitexp2 ~ tokens_hr_child + (language * activity) + 
## m3_both_propimitexp_all_hi:     hi + (1 | id)
##                            npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m1_both_propimitexp_all_hi    7 456.64 489.63 -221.32   442.64                     
## m2_both_propimitexp_all_hi    9 458.71 501.12 -220.35   440.71 1.9303  2     0.3809
## m3_both_propimitexp_all_hi   10 459.03 506.15 -219.51   439.03 1.6806  1     0.1948
# anova table for model with intx
anova(m3_both_propimitexp_all_hi)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq Mean Sq NumDF  DenDF F value   Pr(>F)   
## tokens_hr_child   0.17684 0.17684     1  85.85  1.8957 0.172139   
## language          0.09446 0.09446     1 133.68  1.0126 0.316109   
## activity          0.89726 0.44863     2 805.51  4.8091 0.008391 **
## hi                0.15810 0.15810     1  84.07  1.6948 0.196525   
## language:activity 0.16891 0.08445     2 804.34  0.9053 0.404827   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1